Sub-clustering of T cell clusters
t_clusters_obj@meta.data %>%
ggplot(aes(UMAP1, UMAP2, color = annotation)) +
geom_point(size = 0.75) +
scale_color_manual(values = color) +
labs(x = "UMAP1", y = "UMAP2", color = "") +
theme_classic()

metadata <- t_clusters_obj@meta.data
df = count(metadata,metadata$annotation)
colnames(df) = c("Cluster","Number_of_cells")
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
kable(df) %>%
kable_styling("striped", full_width = T)
|
Cluster
|
Number_of_cells
|
|
CD4_T
|
10205
|
|
Cytotoxic
|
1179
|
ADT and RNA multimodal analysis
DefaultAssay(t_clusters_obj) <- 'RNA'
t_clusters_obj <- NormalizeData(t_clusters_obj) %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA() %>%
RunHarmony(reduction = "pca",dims = 1:30,group.by.vars = "gemid",assay.use = "RNA",reduction.save = "harmony_RNA")
## Centering and scaling data matrix
## PC_ 1
## Positive: MKI67, TOP2A, NUSAP1, RRM2, STMN1, CENPF, KNL1, KIF23, HIST1H1B, BIRC5
## CDK1, NCAPH, ASPM, TPX2, KIFC1, CDCA8, PCLAF, DLGAP5, ZWINT, HMGB2
## HJURP, HIST1H3B, MCM7, CLSPN, DHFR, TK1, CCNB2, KIF15, FANCI, STIL
## Negative: MALAT1, TPT1, IL7R, CCR7, AL138963.4, RPLP1, MAML2, LEF1, NELL2, SELL
## CD40LG, LINC02446, PLAC8, LRRN3, GPRASP1, SAMHD1, SULT1B1, S1PR1, AOAH, TRAV8-4
## CTSL, PASK, TRAV8-1, CTSW, MTRNR2L12, RPL39, TRAV8-3, KLRC4, AC141424.1, S100B
## PC_ 2
## Positive: MKI67, TOP2A, KIF23, NUSAP1, CENPF, RRM2, HIST1H1B, CDCA8, NCAPH, CDK1
## HJURP, DLGAP5, BIRC5, TPX2, ASPM, HIST1H3B, ZWINT, KIFC1, KIF2C, KIF4A
## KNL1, ESCO2, STIL, FANCI, DTL, NUF2, NDC80, CENPE, HIST1H3C, TK1
## Negative: MS4A1, HLA-DRA, HLA-DRB1, HLA-DRB5, HLA-DPA1, HLA-DQB1, HLA-DQA1, CD79A, HLA-DPB1, CD74
## HLA-DQA2, CD22, IFI30, FCRLA, MEF2C, CYBB, VPREB3, HLA-DMB, CD83, TCL1A
## BANK1, CD19, NCF1, SWAP70, HLA-DMA, SYK, HVCN1, SMIM14, BLNK, CD40
## PC_ 3
## Positive: NKG7, CCL5, CTSW, GNLY, GZMA, GZMK, KLRD1, KLRK1, TMSB10, HOPX
## CST7, TYROBP, PECAM1, GZMB, XCL2, SAMD3, IL7R, ANXA1, FCER1G, XCL1
## CCL4, TPT1, PLEK, SLAMF7, KLRG1, GZMH, PRF1, S100B, AOAH, CRTAM
## Negative: TIGIT, TOX2, CTLA4, ITM2A, CXCR5, TBC1D4, TOX, IL21, ICA1, SRGN
## IKZF3, PDCD1, LIMS1, RNF19A, CD200, CXCL13, MAF, RAB27A, BCL6, BCL9L
## NPIPB5, HAL, DRAIC, CD82, ICOS, LINC01480, CEP128, PASK, POU2AF1, PKM
## PC_ 4
## Positive: NKG7, CCL5, GZMA, GNLY, CST7, GZMK, KLRD1, CCL4, CTSW, SRGN
## GZMB, HLA-B, HLA-A, PRF1, TYROBP, XCL2, HLA-C, HOPX, XCL1, GZMH
## TIGIT, FCER1G, PLEK, KLRK1, FASLG, SLAMF7, CXCR6, IL2RB, LYST, DTHD1
## Negative: TPT1, CCR7, SELL, LEF1, IL7R, MS4A1, RPLP0, AL138963.4, EEF2, HIST1H1D
## MKI67, KNL1, NUSAP1, TOP2A, MAML2, CD22, HIST1H1B, MEF2C, MALAT1, CYBB
## VPREB3, CDK1, KIF23, FCRLA, HLA-DRA, TCL1A, CD79A, BANK1, NPM1, LINC00926
## PC_ 5
## Positive: RPL8, RPS15, SLC25A6, RPLP1, PTMA, CORO1A, HLA-C, HLA-B, TMSB10, CFL1
## ACTB, CD7, ARPC1B, HLA-A, ACTG1, CHCHD2, CRIP1, GPX4, SELENOH, CCR7
## NME2, SERBP1, RPLP0, LSP1, UCP2, EMP3, RPL39, CNN2, NOSIP, HMGN2
## Negative: MALAT1, MTRNR2L12, MT-CO1, CCL5, NKG7, TOX, MT-ND6, MTRNR2L8, GZMK, GZMA
## NPIPB5, MT-ND5, TOX2, SH2D1A, CXCL13, MCTP1, DTHD1, SLAMF7, KLRD1, AL627171.2
## TIGIT, BCL6, IKZF3, CCL4, GNLY, FCRL3, LYST, PYHIN1, PTPN14, CTSW
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony 6/10
## Harmony 7/10
## Harmony 8/10
## Harmony 9/10
## Harmony 10/10
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.RNA.harmony_RNA; see ?make.names for more
## details on syntax validity
Check for Cell cycle stages variability between clusters
load(saved_cell_cycle_obj)
t_clusters_obj <- CellCycleScoring(t_clusters_obj,
g2m.features = g2m_genes,
s.features = s_genes)
DefaultAssay(t_clusters_obj) <- 'ADT'
# we will use all ADT features for dimensional reduction
# we set a dimensional reduction name to avoid overwriting the
VariableFeatures(t_clusters_obj) <- rownames(t_clusters_obj[["ADT"]])
t_clusters_obj <- NormalizeData(t_clusters_obj, normalization.method = 'CLR', margin = 2) %>%
ScaleData() %>% RunPCA(reduction.name = 'apca') %>%
RunHarmony(reduction = "apca",dims = 1:20, group.by.vars = "gemid", assay.use = "ADT", reduction.save = "harmony_protein")
## Normalizing across cells
## Centering and scaling data matrix
## PC_ 1
## Positive: CD138-(Syndecan-1), NLRP2.1, CD90-(Thy1), 0856-anti-Tau-Phospho-(Thr181), IgA, 1055-anti-c-Met, HLA-F.1, HLA-DR, CD133, CD11a
## CD193-(CCR3), CD69.1, IgG-Fc, CD15-(SSEA-1), CD309-(VEGFR2), CD45, TCR-gamma-delta, CD30, CD38.1, CD11b
## CD279-(PD-1), CD20, CD45RO, CD18, CD278-(ICOS), CD27.1, CD28.1, CD52.1, CD5.1, CD19.1
## Negative: CD66b, CD197-(CCR7), CD23, CD169-(Sialoadhesin--Siglec-1), CD303-(BDCA-2), CD274-(B7-H1--PD-L1), CD49f, CD370-(CLEC9A-DNGR1), CD267-(TACI), IgG2a--kappa-isotype-Ctrl
## CD106, CD124-(IL-4Ralpha), CD122-(IL-2Rbeta), CD144-(VE-Cadherin), CD324-(E-Cadherin), TCR-Valpha24-Jalpha18-(iNKT-cell), CD96-(TACTILE), CD223-(LAG-3), CD307d-(FcRL4), CD269-(BCMA)
## Mac-2-(Galectin-3), CD204, CD158f-(KIR2DL5), CD207.1, CD163.1, CD137-(4-1BB), CD319-(CRACC), CD178-(Fas-L), TSLPR-(TSLP-R), CD101-(BB27)
## PC_ 2
## Positive: Podoplanin, TIGIT-(VSTM3), CD82.1, CD35, CD54, CD95-(Fas), CD21, CD19.1, CD58-(LFA-3), CD107a-(LAMP-1)
## CD32, CD275-(B7-H2--ICOSL), CD278-(ICOS), CD69.1, CD45RO, CD279-(PD-1), TCR-Vbeta13.1, CD71, HLA-DR, CD70.1
## CD20, CX3CR1.1, CD40.1, CD272-(BTLA), CD336-(NKp44), CD38.1, CD268-(BAFF-R), CD326-(Ep-CAM), CD57-Recombinant, CD39
## Negative: CD8, IgG2b--kappa-isotype-Ctrl, CD49f, CD138-(Syndecan-1), CD90-(Thy1), CD11b, IgA, HLA-A2, 0856-anti-Tau-Phospho-(Thr181), CD309-(VEGFR2)
## CD15-(SSEA-1), CD7.1, TCR-Vdelta2, CD31, 1055-anti-c-Met, NLRP2.1, CD133, IgG-Fc, CD56-(NCAM), CD36.1
## TCR-Valpha7.2, CD127-(IL-7Ralpha), CD328-(Siglec-7), integrin-beta7, HLA-F.1, CD235ab, CD62L, CD66a-c-e, CD33.1, IgG2b--kappa-Isotype-Ctrl
## PC_ 3
## Positive: TCR-gamma-delta, CD138-(Syndecan-1), CD90-(Thy1), 0856-anti-Tau-Phospho-(Thr181), 1055-anti-c-Met, IgG-Fc, NLRP2.1, CD133, HLA-F.1, B7-H4
## IgA, CD154, CD309-(VEGFR2), CD15-(SSEA-1), CD193-(CCR3), CD337-(NKp30), CD336-(NKp44), CD11b, CD158f-(KIR2DL5), CD30
## CD269-(BCMA), CD137L-(4-1BB-Ligand), CD24.1, CD58-(LFA-3), CD303-(BDCA-2), Mac-2-(Galectin-3), Ig-light-chain-lambda, CX3CR1.1, CD70.1, CD124-(IL-4Ralpha)
## Negative: CD2.1, CD5.1, CD3, CD4.1, TCR-alpha-beta, CD99.1, CD52.1, CD45, CD28.1, CD7.1
## CD47.1, CD49f, CD62L, CD278-(ICOS), CD95-(Fas), CD27.1, CD45RO, CD127-(IL-7Ralpha), CD18, CD224
## CD26, CD194-(CCR4), CD82.1, CD305-(LAIR1), CD161, CD44.1, CD11a, CD71, CD25, CD38.1
## PC_ 4
## Positive: CD45RO, CD279-(PD-1), CD278-(ICOS), CD4.1, CD95-(Fas), CD28.1, TIGIT-(VSTM3), CD154, CD5.1, CD226-(DNAM-1)
## CD275-(B7-H2--ICOSL), CD57-Recombinant, CD303-(BDCA-2), CD69.1, 0856-anti-Tau-Phospho-(Thr181), TCR-Valpha24-Jalpha18-(iNKT-cell), CD90-(Thy1), CD133, CD30, CD138-(Syndecan-1)
## CD357-(GITR), 1055-anti-c-Met, CD124-(IL-4Ralpha), CD194-(CCR4), B7-H4, HLA-F.1, CD204, CD2.1, CD269-(BCMA), NLRP2.1
## Negative: CD45RA, CD21, CD35, CD32, CD19.1, CD73-(Ecto-5-nucleotidase), CD40.1, IgD, CD31, CD1c
## CD268-(BAFF-R), CD22.1, CD54, HLA-DR, CD8, CD39, IgM, CD20, CD305-(LAIR1), CD196-(CCR6)
## CD47.1, integrin-beta7, CD49d, CD11c, Ig-light-chain-lambda, CD71, CD101-(BB27), CD314-(NKG2D), CD94, HLA-A-B-C
## PC_ 5
## Positive: CD279-(PD-1), CD278-(ICOS), TIGIT-(VSTM3), CD69.1, CD95-(Fas), CD71, CD275-(B7-H2--ICOSL), CD57-Recombinant, CD45RO, CD54
## CD20, CD19.1, CD10, CD39, CD82.1, CD21, CD38.1, CD32, CD40.1, CD35
## CD146, Podoplanin, HLA-DR, CD1c, CD268-(BAFF-R), CD224, CD152-(CTLA-4), CD81-(TAPA-1), CD22.1, CD185-(CXCR5)
## Negative: CD45RA, CD8, CD127-(IL-7Ralpha), CD3, CD47.1, CD52.1, CD7.1, CD31, CD49f, CD99.1
## CD62L, TCR-alpha-beta, CD226-(DNAM-1), CD101-(BB27), CD5.1, CD314-(NKG2D), CD26, integrin-beta7, CD2.1, CD90-(Thy1)
## 0856-anti-Tau-Phospho-(Thr181), CD138-(Syndecan-1), CD305-(LAIR1), TCR-gamma-delta, CD154, IgG-Fc, CD133, 1055-anti-c-Met, CD45, CD44.1
## Warning: Cannot add objects with duplicate keys (offending key: PC_) setting key
## to original value 'apca_'
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony 6/10
## Harmony 7/10
## Harmony 8/10
## Harmony 9/10
## Harmony 10/10
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.ADT.harmony_protein; see ?make.names for more
## details on syntax validity
Multimodal Neighbor Identification
# Identify multimodal neighbors. These will be stored in the neighbors slot,
# and can be accessed using filtered_bcll.combined[['weighted.nn']]
# The WNN graph can be accessed at filtered_bcll.combined[["wknn"]],
# and the SNN graph used for clustering at filtered_bcll.combined[["wsnn"]]
# Cell-specific modality weights can be accessed at filtered_bcll.combined$RNA.weight
t_clusters_obj <- FindMultiModalNeighbors(
t_clusters_obj, reduction.list = list("harmony_RNA", "harmony_protein"),
dims.list = list(1:30, 1:20), modality.weight.name = "RNA.weight"
)
## Calculating cell-specific modality weights
## Finding 20 nearest neighbors for each modality.
## Calculating kernel bandwidths
## Warning in FindMultiModalNeighbors(t_clusters_obj, reduction.list =
## list("harmony_RNA", : The number of provided modality.weight.name is not
## equal to the number of modalities. RNA.weight ADT.weight are used to store the
## modality weights
## Finding multimodal neighbors
## Constructing multimodal KNN graph
## Constructing multimodal SNN graph
UMAP visualisation
feat_clust <- c(0.1,0.5,1,1.5,2)
t_clusters_obj <- RunUMAP(t_clusters_obj, nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnn_UMAP_")
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 17:43:47 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:43:47 Commencing smooth kNN distance calibration using 1 thread
## 17:43:48 Initializing from normalized Laplacian + noise
## 17:43:48 Commencing optimization for 200 epochs, with 359320 positive edges
## 17:43:52 Optimization finished
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from wnn_UMAP_ to wnnUMAP_
## Warning: All keys should be one or more alphanumeric characters followed by an
## underscore '_', setting key to wnnUMAP_
t_clusters_obj <- FindClusters(t_clusters_obj, graph.name = "wsnn", algorithm = 3, resolution = feat_clust, verbose = FALSE)
Seurat::DimPlot(t_clusters_obj, reduction = 'wnn.umap', label = TRUE, repel = TRUE, label.size = 2.5, group.by = c("wsnn_res.0.1","wsnn_res.0.5","wsnn_res.1","wsnn_res.1.5","wsnn_res.2")) + NoLegend() & Seurat::NoLegend()

t_clusters_obj <- FindClusters(t_clusters_obj, graph.name = "wsnn", algorithm = 3, resolution = 0.5, verbose = FALSE)
t_clusters_obj <- RunUMAP(t_clusters_obj, reduction = 'pca', dims = 1:30, assay = 'RNA',
reduction.name = 'rna.umap', reduction.key = 'rnaUMAP_')
## 17:44:17 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:44:17 Read 11384 rows and found 30 numeric columns
## 17:44:17 Using Annoy for neighbor search, n_neighbors = 30
## 17:44:17 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:44:18 Writing NN index file to temp file /tmp/RtmptrqOre/file1062646aac50b7
## 17:44:18 Searching Annoy index using 1 thread, search_k = 3000
## 17:44:21 Annoy recall = 100%
## 17:44:21 Commencing smooth kNN distance calibration using 1 thread
## 17:44:21 Initializing from normalized Laplacian + noise
## 17:44:22 Commencing optimization for 200 epochs, with 490256 positive edges
## 17:44:25 Optimization finished
t_clusters_obj <- RunUMAP(t_clusters_obj, reduction = 'apca', dims = 1:18, assay = 'ADT',
reduction.name = 'adt.umap', reduction.key = 'adtUMAP_')
## 17:44:25 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:44:25 Read 11384 rows and found 18 numeric columns
## 17:44:25 Using Annoy for neighbor search, n_neighbors = 30
## 17:44:25 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:44:26 Writing NN index file to temp file /tmp/RtmptrqOre/file10626479e292ea
## 17:44:26 Searching Annoy index using 1 thread, search_k = 3000
## 17:44:29 Annoy recall = 100%
## 17:44:29 Commencing smooth kNN distance calibration using 1 thread
## 17:44:30 Initializing from normalized Laplacian + noise
## 17:44:30 Commencing optimization for 200 epochs, with 476590 positive edges
## 17:44:33 Optimization finished
p1 <- DimPlot(t_clusters_obj, reduction = 'rna.umap', label = TRUE,
repel = TRUE, label.size = 2.5) + NoLegend()
p2 <- DimPlot(t_clusters_obj, reduction = 'adt.umap', label = TRUE,
repel = TRUE, label.size = 2.5) + NoLegend()
p1

p2

Seurat::DimPlot(t_clusters_obj, reduction = 'wnn.umap', label = TRUE, repel = TRUE, label.size = 2.5, group.by = "wsnn_res.0.5")

metadata <- t_clusters_obj@meta.data
df = count(metadata,metadata$wsnn_res.0.5)
colnames(df) = c("Cluster","Number_of_cells")
library(knitr)
library(kableExtra)
kable(df) %>%
kable_styling("striped", full_width = T)
|
Cluster
|
Number_of_cells
|
|
0
|
2626
|
|
1
|
2613
|
|
10
|
230
|
|
11
|
186
|
|
12
|
144
|
|
13
|
136
|
|
14
|
82
|
|
15
|
63
|
|
16
|
41
|
|
2
|
1447
|
|
3
|
795
|
|
4
|
659
|
|
5
|
654
|
|
6
|
645
|
|
7
|
511
|
|
8
|
308
|
|
9
|
244
|